#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author mz

import urllib2
import urllib
import re

#捕捉数据
response = urllib2.urlopen("http://python.jobbole.com/81341/")

#清洗数据
def cleanData(html):
    pattern = r"<p>\d+：.+</p>"
    find = re.compile(pattern)
    rel = find.findall(html)
    string = ''
    for value in rel:
        string +=value
        string +='\n'
    return string


cleanInfo = cleanData(response.read())

with open("html.txt","w") as fo:
    fo.write(cleanInfo)